{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "from lxml import etree, objectify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "tree = etree.parse(\"test.xml\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CNML'"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree.docinfo.root_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['\\n  ',\n",
       " '\\n    ',\n",
       " '2019-02-27T01:34:28+08:00',\n",
       " '\\n    ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '新华社',\n",
       " '\\n      ',\n",
       " '\\n    ',\n",
       " '\\n    ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '新媒体专线（图片）',\n",
       " '\\n        ',\n",
       " '0030',\n",
       " '\\n      ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '中文对外专线',\n",
       " '\\n        ',\n",
       " '0101',\n",
       " '\\n      ',\n",
       " '\\n    ',\n",
       " '\\n    ',\n",
       " '\\n    ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '新媒体专线（图片）',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '0030',\n",
       " '\\n      ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '中文对外专线',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '0101',\n",
       " '\\n      ',\n",
       " '\\n    ',\n",
       " '\\n    ',\n",
       " '0227AF73',\n",
       " '\\n  ',\n",
       " '\\n  ',\n",
       " '\\n    ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " 'urn:CNML:xinhua.org:20190227:XxjpseC007183_20190227_PEPFN1:1',\n",
       " '\\n            ',\n",
       " 'xinhua.org',\n",
       " '\\n            ',\n",
       " '20190227',\n",
       " '\\n            ',\n",
       " 'XxjpseC007183_20190227_PEPFN1',\n",
       " '\\n            ',\n",
       " '1',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " 'Usable',\n",
       " '\\n          ',\n",
       " '2019-02-27T08:27:28+08:00',\n",
       " '\\n          ',\n",
       " '2019-02-27T09:33:50+08:00',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n        ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '（新华视界）（1）网球——迪拜赛：锦织圭首轮晋级',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '\\n              ',\n",
       " '\\n                ',\n",
       " '马哈茂德·哈立德',\n",
       " '\\n                ',\n",
       " 'Ma Hamaode·halide',\n",
       " '\\n              ',\n",
       " '\\n            ',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '2019-02-26T17:06:10+08:00',\n",
       " '\\n            ',\n",
       " '\\n            ',\n",
       " '2019-02-27T09:33:50+08:00',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '\\n              ',\n",
       " '\\n                ',\n",
       " '国际新闻',\n",
       " '\\n              ',\n",
       " '\\n            ',\n",
       " '\\n            ',\n",
       " '\\n              ',\n",
       " '\\n                ',\n",
       " '体育',\n",
       " '\\n              ',\n",
       " '\\n            ',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '迪拜-网球-锦织圭,体育',\n",
       " '\\n          ',\n",
       " '\\n        ',\n",
       " '\\n      ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '\\n            ',\n",
       " '\\n              ',\n",
       " '\\n              ',\n",
       " '\\n              ',\n",
       " '1579729',\n",
       " '\\n              ',\n",
       " '3200',\n",
       " '\\n              ',\n",
       " '2133',\n",
       " '\\n              ',\n",
       " '300 dots per inch',\n",
       " '\\n              ',\n",
       " 'PEA00127047',\n",
       " '\\n            ',\n",
       " '\\n            ',\n",
       " '\\n              ',\n",
       " '\\n                ',\n",
       " '\\n                  ',\n",
       " '马哈茂德·哈立德',\n",
       " '\\n                ',\n",
       " '\\n              ',\n",
       " '\\n              ',\n",
       " '\\n                ',\n",
       " '\\n                  ',\n",
       " '迪拜',\n",
       " '\\n                ',\n",
       " '\\n              ',\n",
       " '\\n            ',\n",
       " '\\n          ',\n",
       " '\\n          ',\n",
       " '新华社照片，迪拜，2019年2月27日\\n    （新华视界）（1）网球——迪拜赛：锦织圭首轮晋级\\n    2月26日，锦织圭在比赛中发球。\\n    当日，在2019年ATP迪拜网球赛单打首轮比赛中，日本选手锦织圭以2比0战胜法国选手佩尔，晋级下一轮。\\n    新华社发（马哈茂德·哈立德摄）',\n",
       " '\\n        ',\n",
       " '\\n      ',\n",
       " '\\n      ',\n",
       " '\\n    ',\n",
       " '\\n  ',\n",
       " '\\n  ',\n",
       " '\\n    ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n      ',\n",
       " '\\n      ',\n",
       " '\\n        ',\n",
       " '\\n          ',\n",
       " '\\n        ',\n",
       " '\\n      ',\n",
       " '\\n    ',\n",
       " '\\n  ',\n",
       " '\\n']"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree.xpath('//text()')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "root = tree.getroot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [],
   "source": [
    "childs = root.getchildren()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = childs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "childs = c.getchildren()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<Element {http://www.cnml.org.cn/2005/CNMLSchema}SentTo at 0x7f98111145c8>"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e = childs[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e.getchildren()[0].getchildren()[0].getchildren()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xml.dom.minidom import parse\n",
    "import xml.dom.minidom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [],
   "source": [
    "DOMTree = xml.dom.minidom.parse(\"test.xml\")\n",
    "collection = DOMTree.documentElement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [],
   "source": [
    "e = collection.getElementsByTagName(\"NameTopic\")[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'新华社'"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e.getElementsByTagName('Name')[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [],
   "source": [
    "t= collection.getElementsByTagName(\"TransferTime\")[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import timedelta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2019-02-27 09:34:28'"
      ]
     },
     "execution_count": 135,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(datetime.strptime(t, '%Y-%m-%dT%H:%M:%S+08:00') + timedelta(hours=8)).strftime('%Y-%m-%d %H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2019-02-27 09:34:28'"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dt.strftime('%Y-%m-%d %H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'（新华视界）（1）网球——迪拜赛：锦织圭首轮晋级'"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.getElementsByTagName(\"HeadLine\")[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'迪拜'"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.getElementsByTagName(\"Location\")[0].getElementsByTagName('Name')[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['国际新闻', '体育']"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[e.childNodes[0].data.strip() for e in collection.getElementsByTagName(\"SubjectCodes\")[0].getElementsByTagName('Name')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'迪拜-网球-锦织圭,体育'"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['体育']"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = '体育'\n",
    "a.split(',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'新华社照片，迪拜，2019年2月27日\\n    （新华视界）（1）网球——迪拜赛：锦织圭首轮晋级\\n    2月26日，锦织圭在比赛中发球。\\n    当日，在2019年ATP迪拜网球赛单打首轮比赛中，日本选手锦织圭以2比0战胜法国选手佩尔，晋级下一轮。\\n    新华社发（马哈茂德·哈立德摄）'"
      ]
     },
     "execution_count": 174,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.getElementsByTagName(\"DataContent\")[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [],
   "source": [
    "r = requests.get(\"http://172.16.8.65:8180/s/content/download/p/736796/view\")"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 181,
   "metadata": {},
   "outputs": [],
   "source": [
    "DOMTree = xml.dom.minidom.parseString(r.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 182,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection = DOMTree.documentElement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 183,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'（图表·漫画）[时政]专项行动'"
      ]
     },
     "execution_count": 183,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collection.getElementsByTagName(\"HeadLine\")[0].childNodes[0].data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['国内新闻', '社会']"
      ]
     },
     "execution_count": 184,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[e.childNodes[0].data.strip() for e in collection.getElementsByTagName(\"SubjectCodes\")[0].getElementsByTagName('Name')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
